Raw data
- Study summary: To determine the influence of differential Kdm6a expression in immune cells, whole transcriptome analysis for CD4+ T cells from WT and Kdm6a cKO mice were performed using RNA-Seq.
Loading packages
library(data.table)
library(tidyverse)
library(rmarkdown)
library(ggplot2)
library(pheatmap)
library(AnnotationHub)
library(tximport)
library(Rsubread)
library(DESeq2)
library(UpSetR)
Setting AnnotationHub
Assign your species of interest
AnnotationSpecies <- "mus musculus" # Assign your species
ah <- AnnotationHub(hub=getAnnotationHubOption("URL")) # Bring annotation DB
Running AnnotationHub
ahQuery <- query(ah, c("OrgDb", AnnotationSpecies)) # Filter annotation of interest
if (length(ahQuery) == 1) {
DBName <- names(ahQuery)
} else if (length(ahQuery) > 1) {
DBName <- names(ahQuery)[1]
} else {
print("You don't have a valid DB")
rmarkdown::render()
}
AnnoDb <- ah[[DBName]] # Store into an OrgDb object
# Explore your OrgDb object with following accessors:
# columns(AnnpDb)
# keytypes(AnnoDb)
# keys(AnnoDb, keytype=..)
# select(AnnoDb, keys=.., columns=.., keytype=...)
AnnoKey <- keys(AnnoDb, keytype="ENSEMBLTRANS")
# Note: Annotation has to be done with not genome but transcripts
AnnoDb <- select(AnnoDb,
AnnoKey,
keytype="ENSEMBLTRANS",
columns="ENSEMBL")
# Check if your AnnoDb has been extracted and saved correctely
class(AnnoDb)
## [1] "data.frame"
head(AnnoDb) # The column 1 has to assign transcript (e.g. ENSEMBLTRANS)
## ENSEMBLTRANS ENSEMBL
## 1 ENSMUST00000030010 ENSMUSG00000015243
## 2 ENSMUST00000149127 ENSMUSG00000015243
## 3 ENSMUST00000033695 ENSMUSG00000031333
## 4 ENSMUST00000140333 ENSMUSG00000031333
## 5 ENSMUST00000236391 ENSMUSG00000024030
## 6 ENSMUST00000024829 ENSMUSG00000024030
featureCounts parameter setting
# "mm10", "mm9", "hg38", or "hg19"
# cf. mm10 = GRCm38
annot.inbuilt <- "mm10"
# GTF file path
annot.ext <- "../mouse_reference/gencode.m25.primary_assembly.annotation.gtf"
# annotation type:
# e.g.: "gene_id", "transcript_id", or "gene_name"
GTF.attrType <- "gene_id"
# Number of cores
nthread <- 16
# Set a function importing counts from BAM files with featureCounts()
fcounts.fn <- function(vec) {
fc <- featureCounts(files=vec, # a vector assigning BAM file paths
annot.inbuilt=annot.inbuilt,
annot.ext=annot.ext,
GTF.attrType=GTF.attrType,
isGTFAnnotationFile=T,
nthread=nthread,
isPairedEnd=F, # Set this parameter correctly
verbose=T)
return(fc$counts)
}
Importing counts
Importing Salmon counts
Note:
txi1 <- tximport(…, txOut=F)
txi2 <- tximport(…, txOut=T)
txi2 <- summarizedToGene(…)
counts extracted from txi1 and txi2 are the same
# Import gene level summarized counts
salmon.txi <- tximport(metadata$Salmon_path,
type = "salmon",
tx2gene=AnnoDb,
ignoreTxVersion=T,
txOut=F) # TRUE for transcript level, FALSE for gene level
# Extract the counts and save as a data frame
salmon.counts <- salmon.txi$counts
# Explore the salmon count data frame
head(salmon.counts)
## [,1] [,2] [,3] [,4] [,5] [,6]
## ENSMUSG00000000001 6492 5822.000 5614 6040.000 6142.000 5437.000
## ENSMUSG00000000056 1637 1458.000 1395 1579.000 1608.999 1398.000
## ENSMUSG00000000058 5 5.000 4 3.000 1.000 3.000
## ENSMUSG00000000078 3344 2807.000 2828 3589.000 3731.000 2912.000
## ENSMUSG00000000088 4558 4149.929 3958 4189.921 4151.000 3623.928
## ENSMUSG00000000093 10 8.000 8 7.000 4.000 4.000
dim(salmon.counts)
## [1] 15022 6
summary(salmon.counts)
## V1 V2 V3 V4
## Min. : 0.0 Min. : 0.0 Min. : 0.0 Min. : 0.0
## 1st Qu.: 0.0 1st Qu.: 0.0 1st Qu.: 0.0 1st Qu.: 0.0
## Median : 0.0 Median : 0.0 Median : 0.0 Median : 0.0
## Mean : 675.0 Mean : 603.9 Mean : 585.8 Mean : 613.8
## 3rd Qu.: 29.3 3rd Qu.: 27.0 3rd Qu.: 25.0 3rd Qu.: 28.0
## Max. :357534.3 Max. :322612.0 Max. :318293.5 Max. :343971.6
## V5 V6
## Min. : 0.0 Min. : 0.00
## 1st Qu.: 0.0 1st Qu.: 0.00
## Median : 0.0 Median : 0.00
## Mean : 618.5 Mean : 531.85
## 3rd Qu.: 27.0 3rd Qu.: 22.94
## Max. :337557.2 Max. :305036.86
Importing STAR counts
# Extract counts by running featureCounts()
star.counts <- fcounts.fn(metadata$STAR_path)
##
## ========== _____ _ _ ____ _____ ______ _____
## ===== / ____| | | | _ \| __ \| ____| /\ | __ \
## ===== | (___ | | | | |_) | |__) | |__ / \ | | | |
## ==== \___ \| | | | _ <| _ /| __| / /\ \ | | | |
## ==== ____) | |__| | |_) | | \ \| |____ / ____ \| |__| |
## ========== |_____/ \____/|____/|_| \_\______/_/ \_\_____/
## Rsubread 2.4.0
##
## //========================== featureCounts setting ===========================\\
## || ||
## || Input files : 6 BAM files ||
## || ||
## || WT-rep1Aligned.sortedByCoord.out.bam ||
## || WT-rep2Aligned.sortedByCoord.out.bam ||
## || WT-rep3Aligned.sortedByCoord.out.bam ||
## || cKO-rep1Aligned.sortedByCoord.out.bam ||
## || cKO-rep2Aligned.sortedByCoord.out.bam ||
## || cKO-rep3Aligned.sortedByCoord.out.bam ||
## || ||
## || Paired-end : no ||
## || Count read pairs : no ||
## || Annotation : gencode.m25.primary_assembly.annotation.gtf ... ||
## || Dir for temp files : . ||
## || Threads : 16 ||
## || Level : meta-feature level ||
## || Multimapping reads : counted ||
## || Multi-overlapping reads : not counted ||
## || Min overlapping bases : 1 ||
## || ||
## \\============================================================================//
##
## //================================= Running ==================================\\
## || ||
## || Load annotation file gencode.m25.primary_assembly.annotation.gtf ... ||
## || Features : 843712 ||
## || Meta-features : 55487 ||
## || Chromosomes/contigs : 45 ||
## || ||
## || Process BAM file WT-rep1Aligned.sortedByCoord.out.bam... ||
## || ||
## || Chromosomes/contigs in input file but not in annotation ||
## || GL456359.1 ||
## || GL456393.1 ||
## || GL456389.1 ||
## || JH584300.1 ||
## || GL456368.1 ||
## || GL456394.1 ||
## || GL456360.1 ||
## || JH584301.1 ||
## || GL456390.1 ||
## || GL456213.1 ||
## || GL456382.1 ||
## || GL456378.1 ||
## || JH584302.1 ||
## || GL456387.1 ||
## || GL456370.1 ||
## || GL456366.1 ||
## || GL456383.1 ||
## || GL456379.1 ||
## || GL456396.1 ||
## || GL456392.1 ||
## || GL456367.1 ||
## || ||
## || Single-end reads are included. ||
## || Total alignments : 70746461 ||
## || Successfully assigned alignments : 55979059 (79.1%) ||
## || Running time : 0.07 minutes ||
## || ||
## || Process BAM file WT-rep2Aligned.sortedByCoord.out.bam... ||
## || ||
## || Chromosomes/contigs in input file but not in annotation ||
## || GL456359.1 ||
## || GL456393.1 ||
## || GL456389.1 ||
## || JH584300.1 ||
## || GL456368.1 ||
## || GL456394.1 ||
## || GL456360.1 ||
## || JH584301.1 ||
## || GL456390.1 ||
## || GL456213.1 ||
## || GL456382.1 ||
## || GL456378.1 ||
## || JH584302.1 ||
## || GL456387.1 ||
## || GL456370.1 ||
## || GL456366.1 ||
## || GL456383.1 ||
## || GL456379.1 ||
## || GL456396.1 ||
## || GL456392.1 ||
## || GL456367.1 ||
## || ||
## || Single-end reads are included. ||
## || Total alignments : 63394034 ||
## || Successfully assigned alignments : 50159175 (79.1%) ||
## || Running time : 0.06 minutes ||
## || ||
## || Process BAM file WT-rep3Aligned.sortedByCoord.out.bam... ||
## || ||
## || Chromosomes/contigs in input file but not in annotation ||
## || GL456359.1 ||
## || GL456393.1 ||
## || GL456389.1 ||
## || JH584300.1 ||
## || GL456368.1 ||
## || GL456394.1 ||
## || GL456360.1 ||
## || JH584301.1 ||
## || GL456390.1 ||
## || GL456213.1 ||
## || GL456382.1 ||
## || GL456378.1 ||
## || JH584302.1 ||
## || GL456387.1 ||
## || GL456370.1 ||
## || GL456366.1 ||
## || GL456383.1 ||
## || GL456379.1 ||
## || GL456396.1 ||
## || GL456392.1 ||
## || GL456367.1 ||
## || ||
## || Single-end reads are included. ||
## || Total alignments : 61430033 ||
## || Successfully assigned alignments : 48522991 (79.0%) ||
## || Running time : 0.06 minutes ||
## || ||
## || Process BAM file cKO-rep1Aligned.sortedByCoord.out.bam... ||
## || ||
## || Chromosomes/contigs in input file but not in annotation ||
## || GL456359.1 ||
## || GL456393.1 ||
## || GL456389.1 ||
## || JH584300.1 ||
## || GL456368.1 ||
## || GL456394.1 ||
## || GL456360.1 ||
## || JH584301.1 ||
## || GL456390.1 ||
## || GL456213.1 ||
## || GL456382.1 ||
## || GL456378.1 ||
## || JH584302.1 ||
## || GL456387.1 ||
## || GL456370.1 ||
## || GL456366.1 ||
## || GL456383.1 ||
## || GL456379.1 ||
## || GL456396.1 ||
## || GL456392.1 ||
## || GL456367.1 ||
## || ||
## || Single-end reads are included. ||
## || Total alignments : 64533984 ||
## || Successfully assigned alignments : 51163980 (79.3%) ||
## || Running time : 0.07 minutes ||
## || ||
## || Process BAM file cKO-rep2Aligned.sortedByCoord.out.bam... ||
## || ||
## || Chromosomes/contigs in input file but not in annotation ||
## || GL456359.1 ||
## || GL456393.1 ||
## || GL456389.1 ||
## || JH584300.1 ||
## || GL456368.1 ||
## || GL456394.1 ||
## || GL456360.1 ||
## || JH584301.1 ||
## || GL456390.1 ||
## || GL456213.1 ||
## || GL456382.1 ||
## || GL456378.1 ||
## || JH584302.1 ||
## || GL456387.1 ||
## || GL456370.1 ||
## || GL456366.1 ||
## || GL456383.1 ||
## || GL456379.1 ||
## || GL456396.1 ||
## || GL456392.1 ||
## || GL456367.1 ||
## || ||
## || Single-end reads are included. ||
## || Total alignments : 65134349 ||
## || Successfully assigned alignments : 51790758 (79.5%) ||
## || Running time : 0.07 minutes ||
## || ||
## || Process BAM file cKO-rep3Aligned.sortedByCoord.out.bam... ||
## || ||
## || Chromosomes/contigs in input file but not in annotation ||
## || GL456359.1 ||
## || GL456393.1 ||
## || GL456389.1 ||
## || JH584300.1 ||
## || GL456368.1 ||
## || GL456394.1 ||
## || GL456360.1 ||
## || JH584301.1 ||
## || GL456390.1 ||
## || GL456213.1 ||
## || GL456382.1 ||
## || GL456378.1 ||
## || JH584302.1 ||
## || GL456387.1 ||
## || GL456370.1 ||
## || GL456366.1 ||
## || GL456383.1 ||
## || GL456379.1 ||
## || GL456396.1 ||
## || GL456392.1 ||
## || GL456367.1 ||
## || ||
## || Single-end reads are included. ||
## || Total alignments : 56200709 ||
## || Successfully assigned alignments : 44128747 (78.5%) ||
## || Running time : 0.06 minutes ||
## || ||
## || Write the final count table. ||
## || Write the read assignment summary. ||
## || ||
## \\============================================================================//
# Explore the STAR count data frame
head(star.counts)
## WT-rep1Aligned.sortedByCoord.out.bam
## ENSMUSG00000102693.1 0
## ENSMUSG00000064842.1 0
## ENSMUSG00000051951.5 0
## ENSMUSG00000102851.1 0
## ENSMUSG00000103377.1 1
## ENSMUSG00000104017.1 0
## WT-rep2Aligned.sortedByCoord.out.bam
## ENSMUSG00000102693.1 0
## ENSMUSG00000064842.1 0
## ENSMUSG00000051951.5 0
## ENSMUSG00000102851.1 0
## ENSMUSG00000103377.1 2
## ENSMUSG00000104017.1 0
## WT-rep3Aligned.sortedByCoord.out.bam
## ENSMUSG00000102693.1 0
## ENSMUSG00000064842.1 0
## ENSMUSG00000051951.5 0
## ENSMUSG00000102851.1 0
## ENSMUSG00000103377.1 0
## ENSMUSG00000104017.1 2
## cKO-rep1Aligned.sortedByCoord.out.bam
## ENSMUSG00000102693.1 0
## ENSMUSG00000064842.1 0
## ENSMUSG00000051951.5 0
## ENSMUSG00000102851.1 0
## ENSMUSG00000103377.1 0
## ENSMUSG00000104017.1 1
## cKO-rep2Aligned.sortedByCoord.out.bam
## ENSMUSG00000102693.1 0
## ENSMUSG00000064842.1 0
## ENSMUSG00000051951.5 0
## ENSMUSG00000102851.1 0
## ENSMUSG00000103377.1 0
## ENSMUSG00000104017.1 0
## cKO-rep3Aligned.sortedByCoord.out.bam
## ENSMUSG00000102693.1 1
## ENSMUSG00000064842.1 0
## ENSMUSG00000051951.5 0
## ENSMUSG00000102851.1 1
## ENSMUSG00000103377.1 0
## ENSMUSG00000104017.1 0
dim(star.counts)
## [1] 55487 6
summary(star.counts)
## WT-rep1Aligned.sortedByCoord.out.bam WT-rep2Aligned.sortedByCoord.out.bam
## Min. : 0 Min. : 0
## 1st Qu.: 0 1st Qu.: 0
## Median : 1 Median : 1
## Mean : 1009 Mean : 904
## 3rd Qu.: 86 3rd Qu.: 78
## Max. :811870 Max. :470203
## WT-rep3Aligned.sortedByCoord.out.bam cKO-rep1Aligned.sortedByCoord.out.bam
## Min. : 0.0 Min. : 0.0
## 1st Qu.: 0.0 1st Qu.: 0.0
## Median : 1.0 Median : 1.0
## Mean : 874.5 Mean : 922.1
## 3rd Qu.: 77.0 3rd Qu.: 82.0
## Max. :388953.0 Max. :459595.0
## cKO-rep2Aligned.sortedByCoord.out.bam cKO-rep3Aligned.sortedByCoord.out.bam
## Min. : 0.0 Min. : 0.0
## 1st Qu.: 0.0 1st Qu.: 0.0
## Median : 1.0 Median : 1.0
## Mean : 933.4 Mean : 795.3
## 3rd Qu.: 81.0 3rd Qu.: 70.0
## Max. :412338.0 Max. :390466.0
Importing HISAT2 counts
# Extract counts by running featureCounts()
hisat2.counts <- fcounts.fn(metadata$HISAT2_path)
##
## ========== _____ _ _ ____ _____ ______ _____
## ===== / ____| | | | _ \| __ \| ____| /\ | __ \
## ===== | (___ | | | | |_) | |__) | |__ / \ | | | |
## ==== \___ \| | | | _ <| _ /| __| / /\ \ | | | |
## ==== ____) | |__| | |_) | | \ \| |____ / ____ \| |__| |
## ========== |_____/ \____/|____/|_| \_\______/_/ \_\_____/
## Rsubread 2.4.0
##
## //========================== featureCounts setting ===========================\\
## || ||
## || Input files : 6 BAM files ||
## || ||
## || WT-rep1.sorted.bam ||
## || WT-rep2.sorted.bam ||
## || WT-rep3.sorted.bam ||
## || cKO-rep1.sorted.bam ||
## || cKO-rep2.sorted.bam ||
## || cKO-rep3.sorted.bam ||
## || ||
## || Paired-end : no ||
## || Count read pairs : no ||
## || Annotation : gencode.m25.primary_assembly.annotation.gtf ... ||
## || Dir for temp files : . ||
## || Threads : 16 ||
## || Level : meta-feature level ||
## || Multimapping reads : counted ||
## || Multi-overlapping reads : not counted ||
## || Min overlapping bases : 1 ||
## || ||
## \\============================================================================//
##
## //================================= Running ==================================\\
## || ||
## || Load annotation file gencode.m25.primary_assembly.annotation.gtf ... ||
## || Features : 843712 ||
## || Meta-features : 55487 ||
## || Chromosomes/contigs : 45 ||
## || ||
## || Process BAM file WT-rep1.sorted.bam... ||
## || ||
## || Chromosomes/contigs in input file but not in annotation ||
## || GL456359.1 ||
## || GL456393.1 ||
## || GL456389.1 ||
## || JH584300.1 ||
## || GL456368.1 ||
## || GL456394.1 ||
## || GL456360.1 ||
## || JH584301.1 ||
## || GL456390.1 ||
## || GL456213.1 ||
## || GL456382.1 ||
## || GL456378.1 ||
## || JH584302.1 ||
## || GL456387.1 ||
## || GL456370.1 ||
## || GL456366.1 ||
## || GL456383.1 ||
## || GL456379.1 ||
## || GL456396.1 ||
## || GL456392.1 ||
## || GL456367.1 ||
## || ||
## || Single-end reads are included. ||
## || Total alignments : 63363619 ||
## || Successfully assigned alignments : 48324485 (76.3%) ||
## || Running time : 0.07 minutes ||
## || ||
## || Process BAM file WT-rep2.sorted.bam... ||
## || ||
## || Chromosomes/contigs in input file but not in annotation ||
## || GL456359.1 ||
## || GL456393.1 ||
## || GL456389.1 ||
## || JH584300.1 ||
## || GL456368.1 ||
## || GL456394.1 ||
## || GL456360.1 ||
## || JH584301.1 ||
## || GL456390.1 ||
## || GL456213.1 ||
## || GL456382.1 ||
## || GL456378.1 ||
## || JH584302.1 ||
## || GL456387.1 ||
## || GL456370.1 ||
## || GL456366.1 ||
## || GL456383.1 ||
## || GL456379.1 ||
## || GL456396.1 ||
## || GL456392.1 ||
## || GL456367.1 ||
## || ||
## || Single-end reads are included. ||
## || Total alignments : 56580014 ||
## || Successfully assigned alignments : 43217333 (76.4%) ||
## || Running time : 0.06 minutes ||
## || ||
## || Process BAM file WT-rep3.sorted.bam... ||
## || ||
## || Chromosomes/contigs in input file but not in annotation ||
## || GL456359.1 ||
## || GL456393.1 ||
## || GL456389.1 ||
## || JH584300.1 ||
## || GL456368.1 ||
## || GL456394.1 ||
## || GL456360.1 ||
## || JH584301.1 ||
## || GL456390.1 ||
## || GL456213.1 ||
## || GL456382.1 ||
## || GL456378.1 ||
## || JH584302.1 ||
## || GL456387.1 ||
## || GL456370.1 ||
## || GL456366.1 ||
## || GL456383.1 ||
## || GL456379.1 ||
## || GL456396.1 ||
## || GL456392.1 ||
## || GL456367.1 ||
## || ||
## || Single-end reads are included. ||
## || Total alignments : 54969075 ||
## || Successfully assigned alignments : 41849987 (76.1%) ||
## || Running time : 0.06 minutes ||
## || ||
## || Process BAM file cKO-rep1.sorted.bam... ||
## || ||
## || Chromosomes/contigs in input file but not in annotation ||
## || GL456359.1 ||
## || GL456393.1 ||
## || GL456389.1 ||
## || JH584300.1 ||
## || GL456368.1 ||
## || GL456394.1 ||
## || GL456360.1 ||
## || JH584301.1 ||
## || GL456390.1 ||
## || GL456213.1 ||
## || GL456382.1 ||
## || GL456378.1 ||
## || JH584302.1 ||
## || GL456387.1 ||
## || GL456370.1 ||
## || GL456366.1 ||
## || GL456383.1 ||
## || GL456379.1 ||
## || GL456396.1 ||
## || GL456392.1 ||
## || GL456367.1 ||
## || ||
## || Single-end reads are included. ||
## || Total alignments : 57670244 ||
## || Successfully assigned alignments : 44111169 (76.5%) ||
## || Running time : 0.06 minutes ||
## || ||
## || Process BAM file cKO-rep2.sorted.bam... ||
## || ||
## || Chromosomes/contigs in input file but not in annotation ||
## || GL456359.1 ||
## || GL456393.1 ||
## || GL456389.1 ||
## || JH584300.1 ||
## || GL456368.1 ||
## || GL456394.1 ||
## || GL456360.1 ||
## || JH584301.1 ||
## || GL456390.1 ||
## || GL456213.1 ||
## || GL456382.1 ||
## || GL456378.1 ||
## || JH584302.1 ||
## || GL456387.1 ||
## || GL456370.1 ||
## || GL456366.1 ||
## || GL456383.1 ||
## || GL456379.1 ||
## || GL456396.1 ||
## || GL456392.1 ||
## || GL456367.1 ||
## || ||
## || Single-end reads are included. ||
## || Total alignments : 58054098 ||
## || Successfully assigned alignments : 44514481 (76.7%) ||
## || Running time : 0.06 minutes ||
## || ||
## || Process BAM file cKO-rep3.sorted.bam... ||
## || ||
## || Chromosomes/contigs in input file but not in annotation ||
## || GL456359.1 ||
## || GL456393.1 ||
## || GL456389.1 ||
## || JH584300.1 ||
## || GL456368.1 ||
## || GL456394.1 ||
## || GL456360.1 ||
## || JH584301.1 ||
## || GL456390.1 ||
## || GL456213.1 ||
## || GL456382.1 ||
## || GL456378.1 ||
## || JH584302.1 ||
## || GL456387.1 ||
## || GL456370.1 ||
## || GL456366.1 ||
## || GL456383.1 ||
## || GL456379.1 ||
## || GL456396.1 ||
## || GL456392.1 ||
## || GL456367.1 ||
## || ||
## || Single-end reads are included. ||
## || Total alignments : 50205194 ||
## || Successfully assigned alignments : 38062143 (75.8%) ||
## || Running time : 0.05 minutes ||
## || ||
## || Write the final count table. ||
## || Write the read assignment summary. ||
## || ||
## \\============================================================================//
# Explore the HISAT2 count data frame
head(hisat2.counts)
## WT-rep1.sorted.bam WT-rep2.sorted.bam WT-rep3.sorted.bam
## ENSMUSG00000102693.1 0 0 0
## ENSMUSG00000064842.1 0 0 0
## ENSMUSG00000051951.5 0 0 0
## ENSMUSG00000102851.1 0 0 0
## ENSMUSG00000103377.1 1 2 0
## ENSMUSG00000104017.1 0 0 2
## cKO-rep1.sorted.bam cKO-rep2.sorted.bam
## ENSMUSG00000102693.1 0 0
## ENSMUSG00000064842.1 0 0
## ENSMUSG00000051951.5 0 0
## ENSMUSG00000102851.1 0 0
## ENSMUSG00000103377.1 0 0
## ENSMUSG00000104017.1 1 0
## cKO-rep3.sorted.bam
## ENSMUSG00000102693.1 1
## ENSMUSG00000064842.1 0
## ENSMUSG00000051951.5 0
## ENSMUSG00000102851.1 1
## ENSMUSG00000103377.1 0
## ENSMUSG00000104017.1 0
dim(hisat2.counts)
## [1] 55487 6
summary(hisat2.counts)
## WT-rep1.sorted.bam WT-rep2.sorted.bam WT-rep3.sorted.bam cKO-rep1.sorted.bam
## Min. : 0.0 Min. : 0.0 Min. : 0.0 Min. : 0.0
## 1st Qu.: 0.0 1st Qu.: 0.0 1st Qu.: 0.0 1st Qu.: 0.0
## Median : 1.0 Median : 1.0 Median : 1.0 Median : 1.0
## Mean : 870.9 Mean : 778.9 Mean : 754.2 Mean : 795.0
## 3rd Qu.: 85.0 3rd Qu.: 77.0 3rd Qu.: 76.0 3rd Qu.: 81.5
## Max. :755391.0 Max. :436946.0 Max. :361243.0 Max. :428302.0
## cKO-rep2.sorted.bam cKO-rep3.sorted.bam
## Min. : 0.0 Min. : 0
## 1st Qu.: 0.0 1st Qu.: 0
## Median : 1.0 Median : 1
## Mean : 802.3 Mean : 686
## 3rd Qu.: 81.0 3rd Qu.: 69
## Max. :383887.0 Max. :363272
Data cleaning: sample and gene annotation
countList <- list(salmon.counts,
star.counts,
hisat2.counts)
# Assign names of the count data frames in the count list
names(countList) <- Aligners
# Set a function cleaning the count data frame
clean.fn <- function(df) {
# Convert to a data frame
df <- as.data.frame(df)
# Assign column names
names(df) <- SampleNames
# Bring row names to a column
df <- df %>% rownames_to_column(var="GENEID")
return(df)
}
# Set a function to drop GENEID version
clean.annotation.fn <- function(df) {
# Re-annotate without version specification
df <- separate(df, "GENEID", c("GENEID", "Version"))
# Remove version column
df <- df[, colnames(df) != "Version"]
return(df)
}
# Move GENEID to a column
for (x in Aligners) {
countList[[x]] <- clean.fn(countList[[x]])
}
# Remove version of GENEID and duplicated rows in STAR & HISAT2 count tables
for (x in Aligners) {
countList[[x]] <- clean.annotation.fn(countList[[x]]) %>%
distinct()
}
# Explore the cleaned count data frames
head(countList[[1]])
## GENEID WT-rep1 WT-rep2 WT-rep3 cKO-rep1 cKO-rep2 cKO-rep3
## 1 ENSMUSG00000000001 6492 5822.000 5614 6040.000 6142.000 5437.000
## 2 ENSMUSG00000000056 1637 1458.000 1395 1579.000 1608.999 1398.000
## 3 ENSMUSG00000000058 5 5.000 4 3.000 1.000 3.000
## 4 ENSMUSG00000000078 3344 2807.000 2828 3589.000 3731.000 2912.000
## 5 ENSMUSG00000000088 4558 4149.929 3958 4189.921 4151.000 3623.928
## 6 ENSMUSG00000000093 10 8.000 8 7.000 4.000 4.000
head(countList[[2]])
## GENEID WT-rep1 WT-rep2 WT-rep3 cKO-rep1 cKO-rep2 cKO-rep3
## 1 ENSMUSG00000102693 0 0 0 0 0 1
## 2 ENSMUSG00000064842 0 0 0 0 0 0
## 3 ENSMUSG00000051951 0 0 0 0 0 0
## 4 ENSMUSG00000102851 0 0 0 0 0 1
## 5 ENSMUSG00000103377 1 2 0 0 0 0
## 6 ENSMUSG00000104017 0 0 2 1 0 0
head(countList[[3]])
## GENEID WT-rep1 WT-rep2 WT-rep3 cKO-rep1 cKO-rep2 cKO-rep3
## 1 ENSMUSG00000102693 0 0 0 0 0 1
## 2 ENSMUSG00000064842 0 0 0 0 0 0
## 3 ENSMUSG00000051951 0 0 0 0 0 0
## 4 ENSMUSG00000102851 0 0 0 0 0 1
## 5 ENSMUSG00000103377 1 2 0 0 0 0
## 6 ENSMUSG00000104017 0 0 2 1 0 0
dim(countList[[1]])
## [1] 15022 7
dim(countList[[2]])
## [1] 55487 7
dim(countList[[3]])
## [1] 55487 7
sum(duplicated(countList[[1]]))
## [1] 0
sum(duplicated(countList[[2]]))
## [1] 0
sum(duplicated(countList[[3]]))
## [1] 0
# Convert Salmon counts to integers
countList[["Salmon"]] <- cbind(GENEID=countList[["Salmon"]][, "GENEID"],
round(countList[["Salmon"]][,
colnames(countList[["Salmon"]]) %in% SampleNames]))
# Explore the cleaned count data frames
head(countList[[1]])
## GENEID WT-rep1 WT-rep2 WT-rep3 cKO-rep1 cKO-rep2 cKO-rep3
## 1 ENSMUSG00000000001 6492 5822 5614 6040 6142 5437
## 2 ENSMUSG00000000056 1637 1458 1395 1579 1609 1398
## 3 ENSMUSG00000000058 5 5 4 3 1 3
## 4 ENSMUSG00000000078 3344 2807 2828 3589 3731 2912
## 5 ENSMUSG00000000088 4558 4150 3958 4190 4151 3624
## 6 ENSMUSG00000000093 10 8 8 7 4 4
Plotting sequencing depth
Number of total counts per sample
# Set a function generating a data frame with sequencing depth
seq.depth.fn <- function(df, aligner) {
seqdf <- as.data.frame(colSums(df[, SampleNames])) %>%
rownames_to_column (var="Sample") %>%
mutate(Aligner=aligner)
names(seqdf) <- c("Sample", "Count", "Aligner")
return(seqdf)
}
# Set a function for a bar plot comparing values
comparing.barplot.fn <- function(df, yval, title, ytitle) {
ggplot(df,
aes(x=Sample, y=yval, group=Aligner, fill=Aligner)) +
geom_bar(stat="identity", position="dodge") +
theme_bw() +
ggtitle(title) +
ylab(ytitle)
}
# Initialize the seq depth data frame with the first aligner
seq.depth.df <- seq.depth.fn(countList[[1]], Aligners[1])
# Extend the seq depth data frame with the rest of aligners
for (x in Aligners) {
if (x %in% Aligners[2:length(Aligners)]) {
seq.depth.df <- rbind(seq.depth.df,
seq.depth.fn(countList[[x]], x))
}
}
# Explore how the data frame
print(seq.depth.df)
## Sample Count Aligner
## 1 WT-rep1 10139658 Salmon
## 2 WT-rep2 9071265 Salmon
## 3 WT-rep3 8800358 Salmon
## 4 cKO-rep1 9219918 Salmon
## 5 cKO-rep2 9291690 Salmon
## 6 cKO-rep3 7989400 Salmon
## 7 WT-rep1 55979059 STAR
## 8 WT-rep2 50159175 STAR
## 9 WT-rep3 48522991 STAR
## 10 cKO-rep1 51163980 STAR
## 11 cKO-rep2 51790758 STAR
## 12 cKO-rep3 44128747 STAR
## 13 WT-rep1 48324485 HISAT2
## 14 WT-rep2 43217333 HISAT2
## 15 WT-rep3 41849987 HISAT2
## 16 cKO-rep1 44111169 HISAT2
## 17 cKO-rep2 44514481 HISAT2
## 18 cKO-rep3 38062143 HISAT2
summary(seq.depth.df)
## Sample Count Aligner
## Length:18 Min. : 7989400 Length:18
## Class :character 1st Qu.: 9503682 Class :character
## Mode :character Median :43664251 Mode :character
## Mean :34240922
## 3rd Qu.:48473364
## Max. :55979059
# Convert character vectors to factors
seq.depth.df$Sample <- factor(seq.depth.df$Sample,
levels=SampleNames)
seq.depth.df$Aligner <- factor(seq.depth.df$Aligner,
levels=Aligners)
# Create a plot presenting sequencing depth
comparing.barplot.fn(seq.depth.df,
seq.depth.df$Count,
"Sequencing Depth by Sample and Aligner",
"Count")

Generating DESeq2 objects
- vst() was run to perform variance stabilizing transformation instead of rlog() which takes longer time with similar characteristics.
- The vsd object created by vst() is used for not DE analysis but QC.
Estimating size factors
- black dashed line: size factor = 1
# Calculate and add size factors to the DEseq object
for (x in Aligners) {
ddsList[[x]] <- estimateSizeFactors(ddsList[[x]])
}
# Set a function summarizing size factors by aligner to a data frame
sfactor.fn <- function(df, aligner) {
sizefactor <- as.data.frame(round(sizeFactors(df), 3)) %>%
rownames_to_column(var="Sample") %>%
mutate(Aligner=aligner)
names(sizefactor) <- c("Sample", "Size_Factor", "Aligner")
return(sizefactor)
}
# Initialize a data frame with the first aligner
size.factor.df <- sfactor.fn(ddsList[[1]], Aligners[1])
for (x in Aligners) {
if (x != Aligners[1]) {
size.factor.df <- rbind(size.factor.df,
sfactor.fn(ddsList[[x]], x))
}
}
# Explore the data frame
print(size.factor.df)
## Sample Size_Factor Aligner
## 1 WT-rep1 1.115 Salmon
## 2 WT-rep2 1.000 Salmon
## 3 WT-rep3 0.975 Salmon
## 4 cKO-rep1 1.022 Salmon
## 5 cKO-rep2 1.040 Salmon
## 6 cKO-rep3 0.890 Salmon
## 7 WT-rep1 1.107 STAR
## 8 WT-rep2 1.000 STAR
## 9 WT-rep3 0.977 STAR
## 10 cKO-rep1 1.019 STAR
## 11 cKO-rep2 1.038 STAR
## 12 cKO-rep3 0.891 STAR
## 13 WT-rep1 1.107 HISAT2
## 14 WT-rep2 1.000 HISAT2
## 15 WT-rep3 0.976 HISAT2
## 16 cKO-rep1 1.019 HISAT2
## 17 cKO-rep2 1.038 HISAT2
## 18 cKO-rep3 0.891 HISAT2
# Convert character vectors to factors
size.factor.df$Sample <- factor(size.factor.df$Sample,
levels=SampleNames)
size.factor.df$Aligner <- factor(size.factor.df$Aligner,
levels=Aligners)
# Plot calculated size factors
comparing.barplot.fn(size.factor.df,
size.factor.df$Size_Factor,
"Size Factors by Aligner and Sample",
"Size Factor") + geom_hline(yintercept=1, linetype="dashed", color="black", size=1)

Estimating dispersion and Wald test
- Dispersion is calculated as a measure of variation instead of variance since variance gets larger when gene expression gets higher.
- Wald test is the default setting of DESeq2 which tests null hypothesis between two groups. You should use Likelihood ratio test (LRT) when comparing more than two groups.
Sample QC: Principal Component Analysis (PCA)
Identifies source of variation and sample outliers
# Assigne what to compare
GroupOfInterest <- Contrast[1]
# Set a function for sample pca
qcpca.fn <- function(obj, title) {
plotPCA(obj,
intgroup=GroupOfInterest,
returnData=FALSE) + theme_bw() + ggtitle(paste("PCA:", title))
}
# Print the plots
qcpca.fn(vsdList[[1]], Aligners[1])

qcpca.fn(vsdList[[2]], Aligners[2])

qcpca.fn(vsdList[[3]], Aligners[3])

Sample QC: Sample Correlation Heatmap
Identifies distance between samples & correlation in a group
# Heatmap annotation
HeatmapAnno <- metadata[, c("Sample", "Group")]
# Set a function generating a correlation heatmap
cheatmap.fn <- function(df, title) {
# Extract a normalized count matrix
vm <- assay(df)
# Generate a correlation matrix
cm <- cor(vm)
# Generate a heatmap
pheatmap(cm,
annotation=HeatmapAnno,
main=paste("Sample Correlation Heatmap:", title))
}
# Print the heatmaps
cheatmap.fn(vsdList[[1]], Aligners[1])

cheatmap.fn(vsdList[[2]], Aligners[2])

cheatmap.fn(vsdList[[3]], Aligners[3])

Running DE analysis
# Run DESeq
for (x in Aligners) {
ddsList[[x]] <- DESeq(ddsList[[x]])
# Check result names
ResNames <- resultsNames(ddsList[[x]])
print(ResNames)
}
## [1] "Intercept" "Group_cKO_vs_WT"
## [1] "Intercept" "Group_cKO_vs_WT"
## [1] "Intercept" "Group_cKO_vs_WT"
Creating dispersion plots
- Dispersion is important since estimation by DESeq2 algorithm is based on the assumption that genes with similar expression levels have similar dispersion. If an RNA-seq dataset doesn’t satisfy this assumption, use other DE algorithms than DESeq2.
Exploring distribution of false discovery rate (FDR)
Black dashed line: FDR = 0.1
# Plot distribution of FDR
ggplot(lfc.dataframe,
aes(x=padj, y=..count.., color=Alignment)) +
geom_density(size=1) +
theme_bw() +
scale_x_log10() +
ggtitle("Distribution of False Discovery Rate (FDR) by Aligner") +
ylab("Count") +
xlim(0.00001, 1) +
geom_vline(xintercept=alpha,
color="black",
size=1, linetype="dashed") + scale_x_continuous(breaks=seq(0, 1, by=0.1))

Presenting distribution of log2FoldChange
Black: total genes (padj =/= NA)
Colored: genes above or below FDR=0.1
valid.lfc.df <- subset(lfc.dataframe, FDR == "< 0.1")
ggplot(valid.lfc.df,
aes(x=log2FoldChange,
y=..count..,
color=Alignment)) +
geom_density(size=1) +
theme_bw() +
geom_vline(xintercept=c(-1, 1),
linetype="dashed", color="black", size=1) +
ggtitle("Distribution of log2FoldChange Values by Aligner (FDR < 0.1)") +
ylab("Count") +
xlim(-10, 10) # Change xlim by datatype

Exploring mean-difference with an MA plot
- x-axis: expression level (baseMean))
- y-axis: fold change (log2FoldChange)
- Red dashed lines: log2FoldChange = -1 and 1
# Set ylim: has to adjusted by users depending on data
yl <- c(-10, 10)
# Set min log2 fold change of interest
mLog <- c(-1, 1)
# Create MA plots by Aligner
ggplot(lfc.dataframe, aes(x=baseMean, y=log2FoldChange, color=FDR)) +
geom_point() +
facet_grid(~Alignment) +
scale_x_log10() +
theme_bw() +
scale_color_manual(values=c("blue", "grey")) +
ggtitle(paste("MA plot")) +
ylim(yl[1], yl[2]) +
theme(strip.text.x=element_text(size=10)) +
geom_hline(yintercept=c(mLog[1], mLog[2]),
linetype="dashed", color="red")

Exploring expression profiling with normalized count data
- Normalized count matrices are extracted from dds objects and filtered with thresholds set at FDR and log2FoldChange
- The heatmaps display z-scores of the normalized counts
- In this analysis, mLog = 1
# Initialize a list
heatmap.df.List <- lfcList
# Filter genes with FDR < alpha and absolute log2FoldChange > 1
for (x in Aligners) {
# Set a logical vector filtering FDR below alpha
is.fdr.valid <- lfcList[[x]]$FDR == paste("<", alpha)
# Set a logical vector filtering absolute lfc above 1
is.lfc.large <- abs(lfcList[[x]]$log2FoldChange) > mLog[2]
# Extract total normalized counts
norm.counts <- counts(ddsList[[x]], normalized=T)
# Save filtered genes only from the normalized count data
heatmap.df.List[[x]] <- norm.counts[is.fdr.valid & is.lfc.large,]
}
# Explore the cleaned data frames
head(heatmap.df.List[[1]])
## WT-rep1 WT-rep2 WT-rep3 cKO-rep1 cKO-rep2 cKO-rep3
## ENSMUSG00000000125 17.04733 17.00055 12.30765 19.56256 80.76408 48.31321
## ENSMUSG00000001020 75.36716 90.00293 124.10216 33.25635 33.65170 40.44827
## ENSMUSG00000001025 493.47547 611.01986 500.51120 244.53199 184.60362 214.60052
## ENSMUSG00000002228 200.08187 307.00998 338.46044 150.63171 91.34033 142.69249
## ENSMUSG00000007682 50.24478 50.00163 31.79477 88.03152 143.26010 80.89653
## ENSMUSG00000015533 64.60043 87.00283 93.33303 49.88453 22.11398 43.81895
head(heatmap.df.List[[2]])
## WT-rep1 WT-rep2 WT-rep3 cKO-rep1 cKO-rep2
## ENSMUSG00000025929 151.77009 57.97408 16.37872 7.848327 7.705361
## ENSMUSG00000041872 131.89543 62.97184 16.37872 14.715613 14.447553
## ENSMUSG00000047180 1469.82104 1545.30903 1586.68816 626.885122 470.027043
## ENSMUSG00000026072 49.68664 46.97899 57.32551 10.791450 5.779021
## ENSMUSG00000026070 542.93943 570.74480 901.85308 417.923415 242.718883
## ENSMUSG00000025969 33.42556 36.98346 30.71009 60.824535 78.016784
## cKO-rep3
## ENSMUSG00000025929 5.611470
## ENSMUSG00000041872 8.978352
## ENSMUSG00000047180 622.873185
## ENSMUSG00000026072 14.589822
## ENSMUSG00000026070 253.638450
## ENSMUSG00000025969 65.093054
head(heatmap.df.List[[3]])
## WT-rep1 WT-rep2 WT-rep3 cKO-rep1 cKO-rep2
## ENSMUSG00000025929 150.82592 57.99044 16.38913 7.853379 7.708966
## ENSMUSG00000041872 127.34404 58.99027 12.29185 13.743413 15.417932
## ENSMUSG00000047180 1458.58602 1537.74640 1576.42924 623.361945 468.319699
## ENSMUSG00000026072 49.67321 45.99242 57.36195 10.798396 5.781725
## ENSMUSG00000026070 525.63286 555.90832 872.72106 409.357372 235.123470
## ENSMUSG00000025969 34.31967 37.99373 32.77826 57.918669 82.871387
## cKO-rep3
## ENSMUSG00000025929 5.611066
## ENSMUSG00000041872 8.977705
## ENSMUSG00000047180 617.217229
## ENSMUSG00000026072 14.588771
## ENSMUSG00000026070 246.886892
## ENSMUSG00000025969 70.699428
dim(heatmap.df.List[[1]])
## [1] 74 6
dim(heatmap.df.List[[2]])
## [1] 236 6
dim(heatmap.df.List[[3]])
## [1] 240 6
pheatmap(heatmap.df.List[[3]],
annotation=HeatmapAnno,
scale="row",
show_rownames=F)

# Set a function creating a profiling heatmap
profile.heatmap.fn <- function(df, title) {
pheatmap(df,
annotation=HeatmapAnno,
scale="row",
show_rownames=F,
main=paste("Expression Profiling by", title, "(FDR < 0.1, absolute log2FoldChange > 1)"))
}
# Print the expression heatmaps
profile.heatmap.fn(heatmap.df.List[[Aligners[1]]], Aligners[1])

profile.heatmap.fn(heatmap.df.List[[Aligners[2]]], Aligners[2])

profile.heatmap.fn(heatmap.df.List[[Aligners[3]]], Aligners[3])

NA statistics: zero count genes & outlier genes
When NAs appear in
- log2FoldChange: zero counts in all samples
- padj: too little information
- pval & padj: at least one replicate was an outlier
# Count number of NA genes
type=c("Zero Counts", "Outliers", "Total NA Genes")
# Create a data frame storing number of NA genes by type
NA.genes <- lfc.dataframe %>%
group_by(Alignment) %>%
summarize(zero=sum(is.na(log2FoldChange)),
outlier=sum(is.na(pvalue) & is.na(padj))) %>%
mutate(total=zero + outlier) %>%
gather(Type, Number, -Alignment) %>%
mutate(Type=factor(case_when(Type == "zero" ~ type[1],
Type == "outlier" ~ type[2],
Type == "total" ~ type[3]),
levels=type))
# Plot number of NA genes
ggplot(NA.genes,
aes(x=Type, y=Number, group=Alignment, fill=Alignment, label=Number)) +
geom_bar(stat="identity", position="dodge") +
theme_bw() +
geom_text(position=position_dodge(width=1), vjust=1.5) +
ggtitle("Number of NA Genes") +
ylab("Number of Genes")

Ranking DEGs by alginer
- fdr.rank: ranked by FDR
- lfc.rank: ranked by absolute fold change
- up.lfc.rank: ranked by magnitude of fold change increase
- down.lfc.rank: ranked by manitude of fold change decrease
# Create a new list having DE table with FDR below alpha
fdr.rank <- lfcList
lfc.rank <- lfcList
up.lfc.rank <- lfcList
down.lfc.rank <- lfcList
# Set a sorting genes with FDR below alpha
filter.fdr.fn <- function(df) {as.data.table(df[df$FDR == paste("<", alpha),])}
# Set a function creating a column assigning ranking
Ranking.fn <- function(x) {mutate(x, Rank=1:nrow(x))}
for (x in Aligners) {
rdf <- lfcList[[x]]
fdr.rank[[x]] <- filter.fdr.fn(rdf) %>% arrange(padj) %>% Ranking.fn()
lfc.rank[[x]] <- filter.fdr.fn(rdf) %>% arrange(desc(abs(log2FoldChange))) %>% Ranking.fn()
up.lfc.rank[[x]] <- filter.fdr.fn(rdf) %>% arrange(desc(log2FoldChange)) %>% Ranking.fn()
down.lfc.rank[[x]] <- filter.fdr.fn(rdf) %>% arrange(log2FoldChange) %>% Ranking.fn()
}
# Explore the ranking outputs
head(fdr.rank[[1]])
## GENEID baseMean log2FoldChange lfcSE stat pvalue
## 1: ENSMUSG00000022018 645.5261 1.348896 0.1098847 12.27556 1.225431e-34
## 2: ENSMUSG00000018008 1210.5052 1.404828 0.1188426 11.82092 3.043495e-32
## 3: ENSMUSG00000047180 1006.8911 1.439987 0.1222790 11.77625 5.174627e-32
## 4: ENSMUSG00000032011 15998.0860 1.879407 0.1687060 11.14013 8.000394e-29
## 5: ENSMUSG00000020689 2312.2504 1.536737 0.1398468 10.98872 4.330317e-28
## 6: ENSMUSG00000040498 715.1294 2.226779 0.2078944 10.71111 9.027428e-27
## padj FDR Alignment Rank
## 1: 5.340430e-31 < 0.1 Salmon 1
## 2: 6.631776e-29 < 0.1 Salmon 2
## 3: 7.517008e-29 < 0.1 Salmon 3
## 4: 8.716430e-26 < 0.1 Salmon 4
## 5: 3.774304e-25 < 0.1 Salmon 5
## 6: 6.556922e-24 < 0.1 Salmon 6
head(lfc.rank[[1]])
## GENEID baseMean log2FoldChange lfcSE stat pvalue
## 1: ENSMUSG00000025929 39.59839 3.516588 0.7223282 4.868407 1.125012e-06
## 2: ENSMUSG00000061769 12.35596 -3.303917 0.9208850 -3.587762 3.335282e-04
## 3: ENSMUSG00000043932 15.02868 -3.218227 0.8504086 -3.784330 1.541231e-04
## 4: ENSMUSG00000037129 21.70641 2.999024 0.6362719 4.713432 2.435792e-06
## 5: ENSMUSG00000040653 10.25877 2.971537 0.8909262 3.335335 8.519683e-04
## 6: ENSMUSG00000041872 36.40167 2.642199 0.7245405 3.646724 2.656053e-04
## padj FDR Alignment Rank
## 1: 0.0001114273 < 0.1 Salmon 1
## 2: 0.0096901061 < 0.1 Salmon 2
## 3: 0.0055305321 < 0.1 Salmon 3
## 4: 0.0002166363 < 0.1 Salmon 4
## 5: 0.0197493512 < 0.1 Salmon 5
## 6: 0.0083877396 < 0.1 Salmon 6
head(up.lfc.rank[[1]])
## GENEID baseMean log2FoldChange lfcSE stat pvalue
## 1: ENSMUSG00000025929 39.59839 3.516588 0.7223282 4.868407 1.125012e-06
## 2: ENSMUSG00000037129 21.70641 2.999024 0.6362719 4.713432 2.435792e-06
## 3: ENSMUSG00000040653 10.25877 2.971537 0.8909262 3.335335 8.519683e-04
## 4: ENSMUSG00000041872 36.40167 2.642199 0.7245405 3.646724 2.656053e-04
## 5: ENSMUSG00000030217 25.85230 2.506255 0.5146045 4.870255 1.114545e-06
## 6: ENSMUSG00000070368 37.05782 2.393616 0.5450880 4.391246 1.127028e-05
## padj FDR Alignment Rank
## 1: 0.0001114273 < 0.1 Salmon 1
## 2: 0.0002166363 < 0.1 Salmon 2
## 3: 0.0197493512 < 0.1 Salmon 3
## 4: 0.0083877396 < 0.1 Salmon 4
## 5: 0.0001114273 < 0.1 Salmon 5
## 6: 0.0007674360 < 0.1 Salmon 6
head(down.lfc.rank[[1]])
## GENEID baseMean log2FoldChange lfcSE stat pvalue
## 1: ENSMUSG00000061769 12.35596 -3.303917 0.9208850 -3.587762 3.335282e-04
## 2: ENSMUSG00000043932 15.02868 -3.218227 0.8504086 -3.784330 1.541231e-04
## 3: ENSMUSG00000082901 12.01047 -2.408667 0.7691315 -3.131671 1.738145e-03
## 4: ENSMUSG00000051726 15.39281 -2.214000 0.5950714 -3.720562 1.987797e-04
## 5: ENSMUSG00000026241 17.91325 -2.213291 0.6521051 -3.394071 6.886185e-04
## 6: ENSMUSG00000090691 34.44154 -2.059095 0.4737681 -4.346209 1.385107e-05
## padj FDR Alignment Rank
## 1: 0.0096901061 < 0.1 Salmon 1
## 2: 0.0055305321 < 0.1 Salmon 2
## 3: 0.0330778872 < 0.1 Salmon 3
## 4: 0.0067577830 < 0.1 Salmon 4
## 5: 0.0169547993 < 0.1 Salmon 5
## 6: 0.0009286611 < 0.1 Salmon 6
Calculating rank difference between STAR and HISAT2
Salmon was excluded due to extremely small number of genes found
# Set a function rebuilding DE tables with gene ranks
rankdiff.fn <- function(List){
# Select columns and join the data frames by gene
full_join(List[[Aligners[2]]][, .(GENEID, Alignment, Rank, baseMean)],
List[[Aligners[3]]][, .(GENEID, Alignment, Rank, baseMean)],
by="GENEID") %>%
# Add columns assining gene expression levels, rank differences, and mean ranks
mutate(logMeanExpression=log(baseMean.x+baseMean.y/2),
RankDiff=Rank.x-Rank.y,
MeanRank=(Rank.x+Rank.y)/2)
}
# Calculate rank difference by ranking type
fdr.rankdiff <- rankdiff.fn(fdr.rank)
lfc.rankdiff <- rankdiff.fn(lfc.rank)
up.lfc.rankdiff <- rankdiff.fn(up.lfc.rank)
down.lfc.rankdiff <- rankdiff.fn(down.lfc.rank)
# Explore the calculated rank differences
head(fdr.rankdiff)
## GENEID Alignment.x Rank.x baseMean.x Alignment.y Rank.y
## 1: ENSMUSG00000026463 STAR 1 1562.6504 HISAT2 1
## 2: ENSMUSG00000022018 STAR 2 686.0206 HISAT2 2
## 3: ENSMUSG00000018008 STAR 3 1295.3734 HISAT2 3
## 4: ENSMUSG00000047180 STAR 4 1053.6006 HISAT2 4
## 5: ENSMUSG00000020689 STAR 5 2430.9495 HISAT2 5
## 6: ENSMUSG00000040498 STAR 6 763.7041 HISAT2 41
## baseMean.y logMeanExpression RankDiff MeanRank
## 1: 1537.4785 7.754220 0 1.0
## 2: 667.0280 6.927102 0 2.0
## 3: 1252.9425 7.561041 0 3.0
## 4: 1046.9434 7.363325 0 4.0
## 5: 2385.4226 8.195240 0 5.0
## 6: 743.7298 7.034889 -35 23.5
head(lfc.rankdiff)
## GENEID Alignment.x Rank.x baseMean.x Alignment.y Rank.y
## 1: ENSMUSG00000103587 STAR 1 42.32553 HISAT2 1
## 2: ENSMUSG00000020713 STAR 2 11.98750 <NA> NA
## 3: ENSMUSG00000017002 STAR 3 12.81535 HISAT2 2
## 4: ENSMUSG00000061769 STAR 4 14.59555 HISAT2 3
## 5: ENSMUSG00000025929 STAR 5 41.21467 HISAT2 4
## 6: ENSMUSG00000043932 STAR 6 16.59888 HISAT2 5
## baseMean.y logMeanExpression RankDiff MeanRank
## 1: 41.48362 4.144203 0 1.0
## 2: NA NA NA NA
## 3: 12.65005 2.951800 1 2.5
## 4: 14.27835 3.078911 1 3.5
## 5: 41.06315 4.123033 1 4.5
## 6: 16.44317 3.211669 1 5.5
head(up.lfc.rankdiff)
## GENEID Alignment.x Rank.x baseMean.x Alignment.y Rank.y
## 1: ENSMUSG00000017002 STAR 1 12.81535 HISAT2 1
## 2: ENSMUSG00000025929 STAR 2 41.21467 HISAT2 2
## 3: ENSMUSG00000017897 STAR 3 140.16560 HISAT2 5
## 4: ENSMUSG00000037129 STAR 4 22.03950 HISAT2 3
## 5: ENSMUSG00000034115 STAR 5 14.97109 HISAT2 4
## 6: ENSMUSG00000040653 STAR 6 11.92657 HISAT2 7
## baseMean.y logMeanExpression RankDiff MeanRank
## 1: 12.65005 2.951800 0 1.0
## 2: 41.06315 4.123033 0 2.0
## 3: 132.92687 5.330925 -2 4.0
## 4: 21.69879 3.493135 1 3.5
## 5: 14.83145 3.108472 1 4.5
## 6: 10.47964 2.842953 -1 6.5
head(down.lfc.rankdiff)
## GENEID Alignment.x Rank.x baseMean.x Alignment.y Rank.y
## 1: ENSMUSG00000103587 STAR 1 42.32553 HISAT2 1
## 2: ENSMUSG00000020713 STAR 2 11.98750 <NA> NA
## 3: ENSMUSG00000061769 STAR 3 14.59555 HISAT2 2
## 4: ENSMUSG00000043932 STAR 4 16.59888 HISAT2 3
## 5: ENSMUSG00000058626 STAR 5 17.88988 HISAT2 5
## 6: ENSMUSG00000030178 STAR 6 21.10503 HISAT2 4
## baseMean.y logMeanExpression RankDiff MeanRank
## 1: 41.48362 4.144203 0 1.0
## 2: NA NA NA NA
## 3: 14.27835 3.078911 1 2.5
## 4: 16.44317 3.211669 1 3.5
## 5: 17.73726 3.286853 0 5.0
## 6: 19.63052 3.431413 2 5.0
Visualizing DEG rankings
- x-axis: genes aligned by STAR
- y-axis: genes aligned by HISAT2
- Black diagonal lines: ranking in STAR = HISAT2
- Dot color: gene expression level (log-baseMean)
- 409 genes were missing in the plots
# Set a function plotting DEG rankings
ranking.plot.fn <- function(df, rankedby) {
ggplot(df,
aes(x=Rank.x, y=Rank.y, color=logMeanExpression)) + geom_point(alpha=0.5) + theme_bw() + theme(strip.text.x=element_text(size=10)) + xlab("Ranking in STAR") + ylab("Ranking in HISAT2") + geom_abline(slope=1, color="black", size=0.5) + ggtitle(paste("Gene Ranking by", rankedby)) + scale_color_gradient(low="blue", high="red")
}
# Plot rankings by ranking type
ranking.plot.fn(fdr.rankdiff, "FDR")

ranking.plot.fn(lfc.rankdiff, "Log2FoldChange")

ranking.plot.fn(up.lfc.rankdiff, "Log2FoldChange (Increase)")

ranking.plot.fn(down.lfc.rankdiff, "Log2FoldChange (Decrease)")

Visualizing DEG ranks II: Rank difference
- x-axis: expression level (log-baseMean)
- y-axis: rank difference (STAR ranking - HISAT2 ranking)
- Black horizontal lines: ranking in STAR = HISAT2
- Dot color: average ranking
- 409 genes were missing in the plots
# Set a function plotting DEG rank difference
rankdiff.plot.fn <- function(df, rankedby, ylim) {
ggplot(df, aes(x=logMeanExpression, y=RankDiff, color=MeanRank)) +
geom_point(alpha=0.5) +
theme_bw() +
ylab("Rank Difference (STAR - HISAT2)") +
ggtitle(paste("Rank Difference (STAR - HISAT2)\n in", rankedby)) +
geom_hline(yintercept=0, color="black", size=0.5) + scale_color_gradient(low="blue", high="red") +
ylim(-ylim, ylim)
}
# Display the plots in the same y-scale
rankdiff.plot.fn(fdr.rankdiff, "FDR", 1100)

rankdiff.plot.fn(lfc.rankdiff, "Log2FoldChange", 1100)

rankdiff.plot.fn(up.lfc.rankdiff, "Log2FoldChange (Increase)", 1100)

rankdiff.plot.fn(down.lfc.rankdiff, "Log2FoldChange (Decrease)", 1100)

# Display the plots in free y-scale
rankdiff.plot.fn(fdr.rankdiff, "FDR (free y-scale)", 1100)

rankdiff.plot.fn(lfc.rankdiff, "Log2FoldChange (free y-scale)", 500)

rankdiff.plot.fn(up.lfc.rankdiff, "Log2FoldChange (Increase, free y-scale)", 150)

rankdiff.plot.fn(down.lfc.rankdiff, "Log2FoldChange (Decrease, free y-scale)", 250)

Distribution of rank difference
# Create a list storing rankdiff data frames
rankList <- list(fdr.rankdiff,
lfc.rankdiff,
up.lfc.rankdiff,
down.lfc.rankdiff)
# Assine result column as a factor with levels
rankdiff.levels <- c("FDR",
"log2FoldChange",
"log2FoldChange.Increase",
"log2FoldChange.Decrease")
# Name the list
names(rankList) <- rankdiff.levels
# Create a new data frame storing rank difference by result type
rankdiff.dist <- data.frame(FDR=abs(rankList[[1]]$RankDiff),
log2FoldChange=abs(rankList[[2]]$RankDiff),
log2FoldChange.Increase=abs(rankList[[3]]$RankDiff),
log2FoldChange.Decrease=abs(rankList[[4]]$RankDiff)) %>% gather(Result, RankDiff)
rankdiff.dist$Result <- factor(rankdiff.dist$Result, levels=rankdiff.levels)
# Plot distribution of absolute rank difference
ggplot(rankdiff.dist,
aes(x=Result, y=RankDiff, color=Result)) +
geom_jitter(alpha=0.5) +
geom_boxplot(alpha=0.5, fill="grey", color="black") +
theme_bw() +
theme(axis.text.x=element_text(angle=45, hjust=1)) +
ggtitle("Distribution of Absolute Rank Difference without Shrinkage \n (STAR - HISAT2)") +
ylab("Absolute Rank Difference (STAR - HISAT2)")

Relationship between rank difference and number of transcript versions
- y-axis: abs(TPM-Count inputs)
- x-axis: number of transcripts (number of transcript id / gene id)
- dot color: mean rank
# Create a data frame storing the number of transcripts by gene id
AnnoDb.ntrans <- AnnoDb %>%
group_by(ENSEMBL) %>%
summarize(num.trans=n())
# Set a function adding the number of transcripts by gene id
add.ntrans.fn <- function(df) {
left_join(df, AnnoDb.ntrans, by=c("GENEID"="ENSEMBL"))}
# Add a column indicating the number of transcripts by gene id to every rankdiff data frame
for (x in rankdiff.levels) {
rankList[[x]] <- add.ntrans.fn(rankList[[x]])
}
# Explore the edited data frames
summary(rankList)
## Length Class Mode
## FDR 11 data.table list
## log2FoldChange 11 data.table list
## log2FoldChange.Increase 11 data.table list
## log2FoldChange.Decrease 11 data.table list
head(rankList[[1]])
## GENEID Alignment.x Rank.x baseMean.x Alignment.y Rank.y
## 1: ENSMUSG00000026463 STAR 1 1562.6504 HISAT2 1
## 2: ENSMUSG00000022018 STAR 2 686.0206 HISAT2 2
## 3: ENSMUSG00000018008 STAR 3 1295.3734 HISAT2 3
## 4: ENSMUSG00000047180 STAR 4 1053.6006 HISAT2 4
## 5: ENSMUSG00000020689 STAR 5 2430.9495 HISAT2 5
## 6: ENSMUSG00000040498 STAR 6 763.7041 HISAT2 41
## baseMean.y logMeanExpression RankDiff MeanRank num.trans
## 1: 1537.4785 7.754220 0 1.0 NA
## 2: 667.0280 6.927102 0 2.0 1
## 3: 1252.9425 7.561041 0 3.0 13
## 4: 1046.9434 7.363325 0 4.0 2
## 5: 2385.4226 8.195240 0 5.0 2
## 6: 743.7298 7.034889 -35 23.5 4
head(rankList[[2]])
## GENEID Alignment.x Rank.x baseMean.x Alignment.y Rank.y
## 1: ENSMUSG00000103587 STAR 1 42.32553 HISAT2 1
## 2: ENSMUSG00000020713 STAR 2 11.98750 <NA> NA
## 3: ENSMUSG00000017002 STAR 3 12.81535 HISAT2 2
## 4: ENSMUSG00000061769 STAR 4 14.59555 HISAT2 3
## 5: ENSMUSG00000025929 STAR 5 41.21467 HISAT2 4
## 6: ENSMUSG00000043932 STAR 6 16.59888 HISAT2 5
## baseMean.y logMeanExpression RankDiff MeanRank num.trans
## 1: 41.48362 4.144203 0 1.0 NA
## 2: NA NA NA NA 1
## 3: 12.65005 2.951800 1 2.5 NA
## 4: 14.27835 3.078911 1 3.5 1
## 5: 41.06315 4.123033 1 4.5 1
## 6: 16.44317 3.211669 1 5.5 1
head(rankList[[3]])
## GENEID Alignment.x Rank.x baseMean.x Alignment.y Rank.y
## 1: ENSMUSG00000017002 STAR 1 12.81535 HISAT2 1
## 2: ENSMUSG00000025929 STAR 2 41.21467 HISAT2 2
## 3: ENSMUSG00000017897 STAR 3 140.16560 HISAT2 5
## 4: ENSMUSG00000037129 STAR 4 22.03950 HISAT2 3
## 5: ENSMUSG00000034115 STAR 5 14.97109 HISAT2 4
## 6: ENSMUSG00000040653 STAR 6 11.92657 HISAT2 7
## baseMean.y logMeanExpression RankDiff MeanRank num.trans
## 1: 12.65005 2.951800 0 1.0 NA
## 2: 41.06315 4.123033 0 2.0 1
## 3: 132.92687 5.330925 -2 4.0 NA
## 4: 21.69879 3.493135 1 3.5 1
## 5: 14.83145 3.108472 1 4.5 NA
## 6: 10.47964 2.842953 -1 6.5 2
head(rankList[[4]])
## GENEID Alignment.x Rank.x baseMean.x Alignment.y Rank.y
## 1: ENSMUSG00000103587 STAR 1 42.32553 HISAT2 1
## 2: ENSMUSG00000020713 STAR 2 11.98750 <NA> NA
## 3: ENSMUSG00000061769 STAR 3 14.59555 HISAT2 2
## 4: ENSMUSG00000043932 STAR 4 16.59888 HISAT2 3
## 5: ENSMUSG00000058626 STAR 5 17.88988 HISAT2 5
## 6: ENSMUSG00000030178 STAR 6 21.10503 HISAT2 4
## baseMean.y logMeanExpression RankDiff MeanRank num.trans
## 1: 41.48362 4.144203 0 1.0 NA
## 2: NA NA NA NA 1
## 3: 14.27835 3.078911 1 2.5 1
## 4: 16.44317 3.211669 1 3.5 1
## 5: 17.73726 3.286853 0 5.0 NA
## 6: 19.63052 3.431413 2 5.0 NA
# Set a function plotting rank difference vs number of transcripts
rank.ntrans.plot.fn <- function(df, title) {
ggplot(df, aes(x=num.trans, y=abs(RankDiff), color=MeanRank)) +
geom_jitter(alpha=0.5) +
theme_bw() +
ggtitle(paste("Rank Difference vs Number of Alternative Transcripts \n in", title)) +
xlab("Number of Alternative Transcripts") +
ylab("Absolute Rank Difference \n (STAR - HISAT2)") + scale_color_gradient(low="blue", high="red")
}
# Print the plots
rank.ntrans.plot.fn(rankList[[1]], "FDR")

rank.ntrans.plot.fn(rankList[[2]], "log2FoldChange")

rank.ntrans.plot.fn(rankList[[3]], "log2FoldChange (Increase)")

rank.ntrans.plot.fn(rankList[[4]], "log2FoldChange (Decrease)")

Finding genes having large difference in rankings
# Initialize a list storing rankdiff genes
large.rankdiff <- rankList
# Assign a vector storing minimum (thresholds) rankdiff for filtering large rankdiff genes
rankdiff.thr <- c(10, 10, 10, 10)
names(rankdiff.thr) <- rankdiff.levels
for (x in rankdiff.levels) {
# Filter out observations below the rankdiff thresholds
large.rankdiff[[x]] <- subset(rankList[[x]],
abs(RankDiff) > rankdiff.thr[x]) %>%
# Arrange by descending order of RankDiff
arrange(desc(abs(RankDiff)))
}
# Explore the filtered genes
summary(large.rankdiff)
## Length Class Mode
## FDR 11 data.table list
## log2FoldChange 11 data.table list
## log2FoldChange.Increase 11 data.table list
## log2FoldChange.Decrease 11 data.table list
dim(large.rankdiff[[rankdiff.levels[1]]])
## [1] 1086 11
dim(large.rankdiff[[rankdiff.levels[2]]])
## [1] 847 11
dim(large.rankdiff[[rankdiff.levels[3]]])
## [1] 727 11
dim(large.rankdiff[[rankdiff.levels[4]]])
## [1] 1124 11
head(large.rankdiff[[rankdiff.levels[1]]])
## GENEID Alignment.x Rank.x baseMean.x Alignment.y Rank.y
## 1: ENSMUSG00000062518 STAR 1461 362.34316 HISAT2 496
## 2: ENSMUSG00000079410 STAR 346 158.20395 HISAT2 1224
## 3: ENSMUSG00000073490 STAR 519 71.15374 HISAT2 1376
## 4: ENSMUSG00000096904 STAR 1349 217.61001 HISAT2 572
## 5: ENSMUSG00000021091 STAR 1408 22.77447 HISAT2 696
## 6: ENSMUSG00000079391 STAR 580 117.17884 HISAT2 1284
## baseMean.y logMeanExpression RankDiff MeanRank num.trans
## 1: 209.61909 6.146656 965 978.5 1
## 2: 118.83935 5.382767 -878 785.0 1
## 3: 55.27287 4.592998 -857 947.5 NA
## 4: 169.03497 5.710849 777 960.5 2
## 5: 18.84633 3.471893 712 1052.0 1
## 6: 95.12945 5.104390 -704 932.0 NA
head(large.rankdiff[[rankdiff.levels[2]]])
## GENEID Alignment.x Rank.x baseMean.x Alignment.y Rank.y
## 1: ENSMUSG00000062518 STAR 1055 362.34316 HISAT2 586
## 2: ENSMUSG00000078498 STAR 902 272.90445 HISAT2 678
## 3: ENSMUSG00000093898 STAR 305 82.54365 HISAT2 499
## 4: ENSMUSG00000095661 STAR 748 284.40779 HISAT2 578
## 5: ENSMUSG00000096793 STAR 572 208.60475 HISAT2 414
## 6: ENSMUSG00000096904 STAR 586 217.61001 HISAT2 429
## baseMean.y logMeanExpression RankDiff MeanRank num.trans
## 1: 209.6191 6.146656 469 820.5 1
## 2: 144.3794 5.843817 224 790.0 2
## 3: 105.9320 4.909043 -194 402.0 1
## 4: 185.7112 5.932944 170 663.0 2
## 5: 144.8256 5.638417 158 493.0 8
## 6: 169.0350 5.710849 157 507.5 2
head(large.rankdiff[[rankdiff.levels[3]]])
## GENEID Alignment.x Rank.x baseMean.x Alignment.y Rank.y
## 1: ENSMUSG00000062518 STAR 1002 362.34316 HISAT2 1171
## 2: ENSMUSG00000093898 STAR 1353 82.54365 HISAT2 1209
## 3: ENSMUSG00000079410 STAR 1367 158.20395 HISAT2 1261
## 4: ENSMUSG00000073490 STAR 1396 71.15374 HISAT2 1303
## 5: ENSMUSG00000019817 STAR 1376 623.16765 HISAT2 1294
## 6: ENSMUSG00000028218 STAR 1030 834.59246 HISAT2 956
## baseMean.y logMeanExpression RankDiff MeanRank num.trans
## 1: 209.61909 6.146656 -169 1086.5 1
## 2: 105.93203 4.909043 144 1281.0 1
## 3: 118.83935 5.382767 106 1314.0 1
## 4: 55.27287 4.592998 93 1349.5 NA
## 5: 556.37094 6.803897 82 1335.0 NA
## 6: 777.50650 7.109345 74 993.0 NA
head(large.rankdiff[[rankdiff.levels[4]]])
## GENEID Alignment.x Rank.x baseMean.x Alignment.y Rank.y
## 1: ENSMUSG00000062518 STAR 461 362.34316 HISAT2 243
## 2: ENSMUSG00000084817 STAR 784 6016.81813 HISAT2 676
## 3: ENSMUSG00000078498 STAR 393 272.90445 HISAT2 287
## 4: ENSMUSG00000115852 STAR 1032 328.77715 HISAT2 932
## 5: ENSMUSG00000093898 STAR 110 82.54365 HISAT2 205
## 6: ENSMUSG00000060467 STAR 786 6440.19834 HISAT2 697
## baseMean.y logMeanExpression RankDiff MeanRank num.trans
## 1: 209.6191 6.146656 218 352.0 1
## 2: 4790.6378 9.037431 108 730.0 NA
## 3: 144.3794 5.843817 106 340.0 2
## 4: 266.1622 6.135258 100 982.0 NA
## 5: 105.9320 4.909043 -95 157.5 1
## 6: 4669.4365 9.079653 89 741.5 NA
# Save the filtered and arranged data frames as csv files
write.csv(large.rankdiff[[rankdiff.levels[1]]], "DE_FDR_RankDiff.csv")
write.csv(large.rankdiff[[rankdiff.levels[2]]], "DE_LFC_RankDiff.csv")
write.csv(large.rankdiff[[rankdiff.levels[3]]], "DE_Increase_LFC_RankDiff.csv")
write.csv(large.rankdiff[[rankdiff.levels[4]]], "DE_Decrease_LFC_RankDiff.csv")
Summarizing up/down DEGs with an upset plot
red bar: aligner
blue bar: directionality of gene expression change
# Generate a data frame storing upset input variables
upset.dataframe <- subset(lfc.dataframe, !is.na(padj)) %>%
mutate(Up=ifelse(FDR == paste("<", alpha) & log2FoldChange > 0, GENEID, ""),
Down=ifelse(FDR == paste("<", alpha) & log2FoldChange < 0, GENEID, ""),
Unchanged=ifelse(FDR == paste(">", alpha), GENEID, ""),
Salmon=ifelse(Alignment == "Salmon", GENEID, ""),
STAR=ifelse(Alignment == "STAR", GENEID, ""),
HISAT2=ifelse(Alignment == "HISAT2", GENEID, ""))
# Generate a list input
upset.input <- list(Up=upset.dataframe$Up,
Down=upset.dataframe$Down,
Unchanged=upset.dataframe$Unchanged,
Salmon=upset.dataframe$Salmon,
STAR=upset.dataframe$STAR,
HISAT2=upset.dataframe$HISAT2)
# Create an upset plot
upset(fromList(upset.input),
sets=names(upset.input), # What group to display
sets.x.label="Number of Genes per Group",
order.by="freq",
point.size=3,
sets.bar.color=c("red", "red", "blue", "red", "blue", "blue"),
text.scale = 1.5, number.angles=30)

Session Info
sessionInfo()
## R version 4.0.3 (2020-10-10)
## Platform: x86_64-conda-linux-gnu (64-bit)
## Running under: Ubuntu 20.04.1 LTS
##
## Matrix products: default
## BLAS/LAPACK: /home/mira/miniconda3/envs/r/lib/libopenblasp-r0.3.10.so
##
## locale:
## [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8
## [5] LC_MONETARY=en_US.UTF-8 LC_MESSAGES=en_US.UTF-8
## [7] LC_PAPER=en_US.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C
##
## attached base packages:
## [1] stats4 parallel stats graphics grDevices utils datasets
## [8] methods base
##
## other attached packages:
## [1] AnnotationDbi_1.52.0 UpSetR_1.4.0
## [3] DESeq2_1.30.0 SummarizedExperiment_1.20.0
## [5] Biobase_2.50.0 MatrixGenerics_1.2.0
## [7] matrixStats_0.57.0 GenomicRanges_1.42.0
## [9] GenomeInfoDb_1.26.2 IRanges_2.24.0
## [11] S4Vectors_0.28.1 Rsubread_2.4.0
## [13] tximport_1.18.0 AnnotationHub_2.22.0
## [15] BiocFileCache_1.14.0 dbplyr_2.0.0
## [17] BiocGenerics_0.36.0 pheatmap_1.0.12
## [19] rmarkdown_2.5 forcats_0.5.0
## [21] stringr_1.4.0 dplyr_1.0.2
## [23] purrr_0.3.4 readr_1.4.0
## [25] tidyr_1.1.2 tibble_3.0.4
## [27] ggplot2_3.3.2 tidyverse_1.3.0
## [29] data.table_1.13.4
##
## loaded via a namespace (and not attached):
## [1] colorspace_2.0-0 ellipsis_0.3.1
## [3] XVector_0.30.0 fs_1.5.0
## [5] rstudioapi_0.13 farver_2.0.3
## [7] bit64_4.0.5 interactiveDisplayBase_1.28.0
## [9] fansi_0.4.1 lubridate_1.7.9.2
## [11] xml2_1.3.2 splines_4.0.3
## [13] geneplotter_1.68.0 knitr_1.30
## [15] jsonlite_1.7.2 broom_0.7.2
## [17] annotate_1.68.0 shiny_1.5.0
## [19] BiocManager_1.30.10 compiler_4.0.3
## [21] httr_1.4.2 backports_1.2.1
## [23] assertthat_0.2.1 Matrix_1.2-18
## [25] fastmap_1.0.1 cli_2.2.0
## [27] later_1.1.0.1 htmltools_0.5.0
## [29] tools_4.0.3 gtable_0.3.0
## [31] glue_1.4.2 GenomeInfoDbData_1.2.4
## [33] rappdirs_0.3.1 Rcpp_1.0.5
## [35] cellranger_1.1.0 vctrs_0.3.5
## [37] xfun_0.19 ps_1.5.0
## [39] rvest_0.3.6 mime_0.9
## [41] lifecycle_0.2.0 XML_3.99-0.5
## [43] zlibbioc_1.36.0 scales_1.1.1
## [45] hms_0.5.3 promises_1.1.1
## [47] RColorBrewer_1.1-2 yaml_2.2.1
## [49] curl_4.3 memoise_1.1.0
## [51] gridExtra_2.3 stringi_1.5.3
## [53] RSQLite_2.2.1 BiocVersion_3.12.0
## [55] genefilter_1.72.0 BiocParallel_1.24.1
## [57] rlang_0.4.9 pkgconfig_2.0.3
## [59] bitops_1.0-6 evaluate_0.14
## [61] lattice_0.20-41 labeling_0.4.2
## [63] bit_4.0.4 tidyselect_1.1.0
## [65] plyr_1.8.6 magrittr_2.0.1
## [67] R6_2.5.0 generics_0.1.0
## [69] DelayedArray_0.16.0 DBI_1.1.0
## [71] pillar_1.4.7 haven_2.3.1
## [73] withr_2.3.0 survival_3.2-7
## [75] RCurl_1.98-1.2 modelr_0.1.8
## [77] crayon_1.3.4 locfit_1.5-9.4
## [79] grid_4.0.3 readxl_1.3.1
## [81] blob_1.2.1 reprex_0.3.0
## [83] digest_0.6.27 xtable_1.8-4
## [85] httpuv_1.5.4 munsell_0.5.0